package org.huangrui.spark.java.core.rdd.operate.action;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import scala.Tuple2;

import java.util.Arrays;
import java.util.List;
import java.util.Map;

/**
 * @Author hr
 * @Create 2024-10-18 8:47
 */
public class Spark03_Operate_Action_1 {
    public static void main(String[] args) {
        final SparkConf conf = new SparkConf().setMaster("local[*]").setAppName("spark");
        final JavaSparkContext jsc = new JavaSparkContext(conf);
        final List<Integer> nums = Arrays.asList(4, 2, 3, 1);
        final JavaRDD<Integer> rdd = jsc.parallelize(nums, 2);
        // TODO countByKey : 将结果按照Key计算数量
        /*
          4, 2, 3, 1
          ------------------------------
          (a, 4), (a,2), (a, 3), (a, 1)
          ------------------------------
          (a, 4)
         */
        Map<String, Long> map = rdd.mapToPair(x -> new Tuple2<>("a", x)).countByKey();
        System.out.println("map = " + map);
        jsc.close();
    }
}
